home *** CD-ROM | disk | FTP | other *** search
- /* National Institute of Standards and Technology (NIST)
- /* National Computer System Laboratory (NCSL)
- /* Office Systems Engineering (OSE) Group
- /* ********************************************************************
- /* D I S C L A I M E R
- /* (March 8, 1989)
- /*
- /* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
- /* NCSL OSE SGML parser validation suite. If the SGML parser and/or
- /* validation suite is modified by someone else and passed on, NIST wants
- /* the parser's recipients to know that what they have is not what NIST
- /* distributed, so that any problems introduced by others will not
- /* reflect on our reputation.
- /*
- /* Policies
- /*
- /* 1. Anyone may copy and distribute verbatim copies of the SGML source
- /* code as received in any medium.
- /*
- /* 2. Anyone may modify your copy or copies of SGML parser source code or
- /* any portion of it, and copy and distribute such modifications provided
- /* that all modifications are clearly associated with the entity that
- /* performs the modifications.
- /*
- /* NO WARRANTY
- /* ===========
- /*
- /* NIST PROVIDES ABSOLUTELY NO WARRANTY. THE SGML PARSER AND VALIDATION
- /* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
- /* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- /* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
- /* WITH YOU. SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
- /* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
- /*
- /* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
- /* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
- /* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- /* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
- /* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
- /* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
- /* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
- /* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- */
-
- /*****************************************************************************/
- /* TITLE: SGML Parser */
- /* SYSTEM: Document Processor */
- /* SUBSYSTEM: */
- /* SOURCE FILE: DIDRIVER.C */
- /* AUTHOR: Steve Lindeman, Fred Maples */
- /* */
- /* This is a subsystem of a Standard Generalized Markup Language */
- /* (SGML) parser conforming to ISO 8879-1986(E). This subsystem */
- /* will be referred to as the "instance parser" because it processes */
- /* the actual instance of the tagged document rather than the Document */
- /* Type Definition. This parser assumes the Core Concrete Syntax */
- /* and the OMITTAG and FORMAL features of SGML. */
- /* */
- /* First the command line options are read to determine what */
- /* functions of the parser are requested. A full listing of options */
- /* are documented in 'readme.par', among other places. */
- /* */
- /* This parser can output an ASCII Text File defined to be a */
- /* Canonical Test Result (CTR) that could actually determine if the */
- /* parsing was done correctly. SGML is an input only language. There */
- /* are no requirements in the language that state what the output */
- /* of the parser must be. It is required only to report an error if */
- /* one exists in the document, and not report an error if one does */
- /* not exist. A description of the CTR language can also be found in */
- /* external documentation. */
- /* */
- /* DATE CREATED: July 13, 1987 */
- /* LAST MODIFIED: */
- /* */
- /* REVISIONS */
- /* WHEN WHO WHY */
- /* */
- /*****************************************************************************/
-
- #include <stdio.h>
- #include "didefs.h"
- #include "diglobal.h"
-
- main(argc,argv)
- int argc;
- char *argv[];
- {
- STENTRY *tp; /* pointer to symbol table entry */
- TNODE *newcm; /* ptr to root of newly created content model */
- ENTITYDESC *genthead,*penthead;
- ID_IDREF_DESC *idrefp;
- char path[PATHLEN];
- int token;
- STATUS retval;
- TKNRETVAL tknretval;
- BOOLEAN root_minimized,delete_temps,bld_ctr;
-
- /* The initialization of the instance-parser involves reading in a
- hiearchical representation of what the instance of the document may
- contain. This tree-like structure is traversed as the execution of
- the parser progresses. There are always exceptions to the rules
- (e.g. ANY declared content, exclusion or inclusion exceptions) that
- may cause the tree traversals to look quite strange. Once the entire
- document is processed, control is returned to this main and a cross
- reference is made of all the IDs and IDREFs made during the parse to
- ensure there is an ID for every IDREF. There is no requirement that
- there be an IDREF for every ID. */
-
- checkopt(argc,argv,path,&delete_temps,&bld_ctr);
- init(&numsym,&genthead,&penthead,path); /* build symtable and trees */
- gettilnosep(); /* ??? */
- tknretval = gettoken(&tp,&token,genthead,penthead,&dontcare); /* get root token */
- if (tknretval==MARKUP_FOUND && token!=rootid)
- ungettoken(token,tp);
- if (tknretval==TEXT || token!=rootid) {
- root_minimized = TRUE;
- tp = perform_roottag_mini(rootid);
- }
- else
- root_minimized = FALSE;
- newcm = pushcreate(tp);
- if ((retval=traverse(newcm,tp,genthead,penthead,&dontcare)) == NFSH) {
- if (tknretval == TEXT)
- sprintf(error_msg,"%s%s%s","\nError: Invalid data, last opened element '",tp->nametoken,"'.\n");
- else
- sprintf(error_msg,"%s%s%s","\nError: Invalid tag, last opened element '",tp->nametoken,"'.\n");
- FATAL_ERROR()
- }
- /* check to make sure the element has content */
- if (retval==NFDHT && root_minimized) {
- sprintf(error_msg,"%s'%s'.\n","\nError: Invalid Starttag Minimization on tag ",tp->nametoken);
- FATAL_ERROR()
- }
- if (EMPTY_CONTENT(newcm)) { /* can't have endtag for EMPTY */
- putstr_outbuf("\n[/");
- putstr_outbuf(symtable[rootid].nametoken);
- place_in_queue(END_TAG_NAME,symtable[rootid].nametoken,"");
- putstr_outbuf("]");
- token = rootid | HIGHBIT; /* don't change tp */
- tknretval = MARKUP_FOUND;
- }
- else
- tknretval = gettoken(&tp,&token,genthead,penthead,&dontcare); /* must be end tag */
- if (tknretval==TEXT || IS_STARTTAG(token) || IS_ENDTAG_NOTEQ(token,rootid))
- resolve_endtag((TNODE *) NULL,token,tp,&retval,tknretval,genthead,penthead,rootid);
- flush_buf();
- popfree(newcm); /* through with this content model */
- if ((idrefp=cross_id_idref(idhead,idrefhead)) != NULL) {
- sprintf(error_msg,"%s'%s'.\n","\nError: Unsuccessful crossref for ID-IDREF ",idrefp->name);
- FATAL_ERROR()
- }
- gettilnosep();
- if (our_fgetc(indoc) != EOF)
- ourexit(2,"\nInvalid additional text in document.\n");
- if (fclose(indoc) != 0) {
- printf("Unable to close input document.\n");
- exit(99);
- }
- if (bld_ctr)
- if (fclose(ctrfp) != 0) {
- printf("Unable to close CTR file.\n");
- exit(99);
- }
- if (delete_temps)
- delete_files(path);
- printf("\nNormal program termination.\n");
- }
-
- /*------------------------------------------------------*/
- /* D O O P E R A T O R */
- /* This routine processes a tree node that */
- /* contains an operator. However, we don't */
- /* which operator yet. */
- /* */
- /* returns -- NFDHT, FOUND, NFSH */
- /*------------------------------------------------------*/
- STATUS dooperator(ptr,tp,genthead,penthead)
- TNODE *ptr;
- STENTRY *tp;
- ENTITYDESC *genthead,*penthead;
- {
- STATUS retval,prevretval;
-
- prevretval = NFDHT; /* anything but FOUND */
- while(testoi(ptr) != OI_IS_NULL) {
- switch(ptr->nodeid) {
- case COMMA:
- retval = exe_seq(ptr,tp,genthead,penthead);
- break;
- case OR:
- retval = exe_or(ptr,tp,genthead,penthead);
- break;
- case AND:
- retval = exe_and(ptr,tp,genthead,penthead);
- break;
- }
-
- /* Once you've found one occurrence of that token, an attempt to find
- another must be thought of as FOUND and not NFDHT. If it was mis-
- interpreted as NFDHT, the parsing might incorrectly attempt to parse
- right-hand side of an OR group. */
-
- if ((prevretval==FOUND) && (retval==NFDHT))
- retval = FOUND;
- prevretval = retval;
- }
- return(retval);
- }
-
- /*------------------------------------------------------*/
- /* D O O P E R A N D */
- /* This content model is not fully defined */
- /* here; there is another content model that */
- /* would logically be a child of this one. */
- /* */
- /* returns -- NFDHT, NFSH, FOUND */
- /*------------------------------------------------------*/
- STATUS dooperand(ptr,tp,genthead,penthead)
- TNODE *ptr;
- STENTRY *tp;
- ENTITYDESC *genthead,*penthead;
- {
- /* Correction to problem reported by LES GONDOR */
- STATUS retval = NFDHT;
- int token;
- TKNRETVAL tknretval;
-
- while (testoi(ptr) != OI_IS_NULL) { /* stay down in tree til node's resolved */
- tknretval = gettoken(&tp,&token,genthead,penthead,&dontcare);
- if (tknretval == TEXT)
- resolve_starttag(ptr,&retval,genthead,penthead,tknretval,token);
- else
- if (ptr->nodeid == token && !find_except(currexcl,token)) /* token is starttag */
- retval = exe_new_conmod(tp,genthead,penthead,ptr,FALSE);
- else {
- ungettoken(token,tp); /* no match, assume needed elsewhere */
- resolve_starttag(ptr,&retval,genthead,penthead,tknretval,token);
- }
- } /* end while */
- return(retval);
- }
-
- /*------------------------------------------------------*/
- /* D O T E R M I N A L */
- /* This content model has been defined */
- /* down to it's lowest level possible */
- /* */
- /* returns -- NFDHT, FOUND, NFSH */
- /*------------------------------------------------------*/
- STATUS doterminal(ptr,genthead,penthead)
- TNODE *ptr;
- ENTITYDESC *genthead,*penthead;
- {
- register STATUS retval;
-
- /* If an attempt to get a token (a tag) has been made and it was not
- successfully matched as a valid token in the tree, it can be "put
- back" and re-read later. If a token has been put back, an attempt to
- read data should be thought of as NFDHT in order to keep from
- incorrectly reading up characters of data. */
-
- if (state == GETNEW)
- switch(ptr->nodeid) {
- case CDATA:
- retval = getcdata();
- break;
- case RCDATA:
- retval = getrcdata(genthead,TRUE,&dontcare,TRUE);
- break;
- case PCDATA:
- retval = getpcdata(genthead,penthead);
- break;
- case EMPTY:
- /* fix from Fred */
- retval = FOUND;
- break;
- default:
- software_fault();
- }
- else
- retval = NFDHT; /* if state=GETOLD don't try to get data */
- return(retval);
- }
-
- /*------------------------------------------------------*/
- /* E X E _ N E W _ C O N M O D */
- /* This routine is executed when a new content */
- /* model has been encountered and thus the parse */
- /* needs to continue accordingly. A pointer to */
- /* token is pushed on the stack, and parse */
- /* begins traversing on the newly created content */
- /* model. */
- /*------------------------------------------------------*/
- STATUS exe_new_conmod(tp,genthead,penthead,ptr,perform_strttag)
- STENTRY *tp;
- ENTITYDESC *genthead,*penthead;
- TNODE *ptr;
- BOOLEAN perform_strttag;
- {
- int token;
- TKNRETVAL tknretval;
- STATUS retval;
- TNODE *newcm;
-
- /* must create a new content model to allow recursive
- definitions. then go traverse on the new content model */
- newcm = pushcreate(tp);
- if ((retval=traverse(newcm,tp,genthead,penthead,&dontcare)) == NFSH) {
- if (tknretval == TEXT)
- sprintf(error_msg,"%s%s%s","\nError: Invalid data, last opened element '",tp->nametoken,"'.\n");
- else
- sprintf(error_msg,"%s%s%s","\nError: Invalid tag, last opened element '",tp->nametoken,"'.\n");
- FATAL_ERROR()
- }
- else
- /* It is an error to have done start-tag minimization on an element
- that contains empty content. */
- if (retval==NFDHT && perform_strttag==TRUE) {
- sprintf(error_msg,"%s%s%s","\nError: Invalid Starttag Minimization on tag '",tp->nametoken,"'.\n");
- FATAL_ERROR()
- }
- else {
-
- /* This prints out the endtag for elements that are required to have
- no endtag possibly through tag minimization, EMPTY declared content,
- or content reference attributes. */
- if (EMPTY_CONTENT(newcm)) { /* can't have endtag for EMPTY */
- putstr_outbuf("\n[/");
- putstr_outbuf(tp->nametoken);
- place_in_queue(END_TAG_NAME,tp->nametoken,"");
- putstr_outbuf("]");
- token = ptr->nodeid | HIGHBIT; /* don't change tp */
- tknretval = MARKUP_FOUND;
- }
- else
- tknretval = gettoken(&tp,&token,genthead,penthead,&dontcare);
-
- /* If you were looking for an endtag for an element and actually found
- data instead, an attempt must be made to minimize the end tag. The
- only thing involved in the attempt is verifying that endtag minimization
- was declared as legal on its ELEMENT declaration. */
-
- if (tknretval == TEXT)
- resolve_endtag(ptr,token,tp,&retval,tknretval,genthead,penthead,newcm->nodeid);
- else
- if (IS_ENDTAG(token) && IS_ENDTAG_EQ(token,ptr->nodeid)) {
- decroi(ptr);
- retval = FOUND;
- }
- else
- resolve_endtag(ptr,token,tp,&retval,tknretval,genthead,penthead,ptr->nodeid);
- }
- popfree(newcm); /* through with this content model */
- return(retval);
- }
-
- /*------------------------------------------------------*/
- /* E X E _ A N D */
- /* This routine processes the AND operator. */
- /* ANDs are stored as circularly linked lists */
- /* with each element in the list representing */
- /* an element in the AND group. Control keeps */
- /* circling the list until all are matched */
- /* or an entire circle was made and no match */
- /* was made. */
- /* */
- /* returns -- FOUND, NFDHT, NFSH */
- /*------------------------------------------------------*/
- STATUS exe_and(ptr,tp,genthead,penthead)
- TNODE *ptr;
- STENTRY *tp;
- ENTITYDESC *genthead,*penthead;
- {
- unsigned num_ands,num_proc;
- STATUS retval;
- BOOLEAN foundone;
- register TNODE *currp;
-
- /* Count the number of tokens in the AND circular linked lists. */
- num_ands = 1;
- for (currp=ptr->u.llptr; currp->next!=ptr->u.llptr; currp=currp->next)
- num_ands++;
-
- num_proc = 0;
- foundone = FALSE;
- for (currp=ptr->u.llptr; num_ands>0 && num_ands!=num_proc; currp=currp->next)
-
- /* This node will be visited if it has NOT already been satisfied. The
- occurrence indicator was set to OI_IS_NULL when it was satisfied. The AND
- parsing continues until all nodes are satisfied or you go all the way
- around the list without satisfying any nodes. */
-
- if (testoi(currp) != OI_IS_NULL)
- switch(retval=traverse(currp,tp,genthead,penthead,&dontcare)) {
- case NFSH:
- case NFDHT:
- num_proc++;
- restoreoi(currp);
- break;
- case FOUND:
- num_proc = 0;
- num_ands--;
- foundone = TRUE;
- break;
- }
- if (num_ands == 0)
- retval = FOUND;
- else {
- /* It could have been that some items in the AND group were satisfied
- and some were not. This determines what STATUS the entire AND group
- should resolve to based on the priority: FOUND, NFDHT, then NFSH
- taking into account whether or not the nodes were satisfied. */
-
- retval = NFDHT; /* assume NFDHT, until you find out different */
-
- for (currp=ptr->u.llptr; currp->next!=ptr->u.llptr; currp=currp->next)
- solveand(&retval,currp,ptr);
- solveand(&retval,currp,ptr); /* don't forget about last one in list */
- }
- switch(retval) {
- case FOUND:
- decroi(ptr);
- break;
- case NFDHT:
- NULLOI(ptr);
- break;
- case NFSH:
- if (testoi(ptr)==OPT && !foundone)
- retval = NFDHT;
- NULLOI(ptr);
- if (foundone) { /* at least one was FOUND, at least one was NFSH */
- sprintf(error_msg,"%s%s%s","\nError: Invalid or missing tag, last opened element '",tp->nametoken,"'.\n");
- FATAL_ERROR()
- }
- break;
- }
-
- /* restore everything in '&' list except '&' node */
- if (testoi(ptr) != OI_IS_NULL) {
- for (currp=ptr->u.llptr; currp->next!=ptr->u.llptr; currp=currp->next)
- restoreoi(currp);
- restoreoi(currp);
- }
- return(retval);
- }
-
- /*--------------------------------------------------------------*/
- /* E X E _ O R */
- /* This routine processes an OR operator found in the */
- /* content model tree. If left side is satisfied, the */
- /* right side is not processed. If the left side is not */
- /* satisfied, the right side must be. OR operator means */
- /* "one or the other, but not both". The OR operator */
- /* is both commutative and associative. That is: */
- /* (A|B|C) == ((A|B)|C) == (A|(B|C)) */
- /* Thus we have implemented all OR groups as binary trees. */
- /* */
- /* returns -- FOUND, NFDHT, NFSH */
- /*--------------------------------------------------------------*/
- STATUS exe_or(ptr,tp,genthead,penthead)
- TNODE *ptr;
- STENTRY *tp;
- ENTITYDESC *genthead,*penthead;
- {
- STATUS retval;
-
- switch(traverse(ptr->left,tp,genthead,penthead,&dontcare)) {
- /* If you went left in the OR group and found the token, you cannot
- make an attempt to parse the right-hand side. */
- case FOUND:
- decroi(ptr);
- retval = FOUND;
- if (testoi(ptr) != OI_IS_NULL)
- restoreoi(ptr->left); /* restore only left, never went right */
- break;
- /* If you went left in an OR group and didn't satisfy the token, you
- MUST make an attempt to parse the right-hand side. */
- case NFSH:
- case NFDHT:
- switch(traverse(ptr->u.right,tp,genthead,penthead,&dontcare)) {
- case FOUND:
- retval = FOUND;
- decroi(ptr); /* found on right, decrement operator o.i */
- if (testoi(ptr) != OI_IS_NULL) {
- restoreoi(ptr->left); /* rebuild left and right subtrees */
- restoreoi(ptr->u.right);
- }
- break;
- case NFDHT:
- NULLOI(ptr);
- retval = NFDHT;
- break;
- case NFSH: /* not found left or right, see if node's optional */
- retval = (testoi(ptr) == OPT) ? NFDHT : NFSH;
- NULLOI(ptr); /* cannot have anymore */
- break;
- }
- break;
- }
- return(retval);
- }
-
- /*--------------------------------------------------------------*/
- /* E X E _ S E Q */
- /* This routine processes an COMMA operator found in */
- /* the content model tree. First the left side is */
- /* processed, then right side is processed. The SEQ */
- /* operator is associative but NOT commutative. */
- /* That is: (A,B,C) == ((A,B),C) == (A,(B,C)) */
- /* Thus we have implemented all SEQ groups as binary */
- /* trees. */
- /* */
- /* returns -- FOUND, NFDHT, NFSH */
- /*--------------------------------------------------------------*/
- STATUS exe_seq(ptr,tp,genthead,penthead)
- TNODE *ptr;
- STENTRY *tp;
- ENTITYDESC *genthead,*penthead;
- {
- register STATUS retval;
-
- switch(traverse(ptr->left,tp,genthead,penthead,&dontcare)) {
- case FOUND:
- change_right(ptr->u.right,FOUND);
- switch(traverse(ptr->u.right,tp,genthead,penthead,&dontcare)) {
- case NFDHT:
- case FOUND: /* OK left and right */
- retval = FOUND;
- decroi(ptr);
- if (testoi(ptr) != OI_IS_NULL) { /* restore only if more can occur */
- restoreoi(ptr->left);
- restoreoi(ptr->u.right);
- }
- break;
- default:
- sprintf(error_msg,"%s%s%s","\nError: Invalid or missing tag, last opened element '",tp->nametoken,"'.\n");
- FATAL_ERROR()
- break;
- }
- break;
- case NFSH:
- /* not found left, if node OPT then ok, else assume error */
- retval = (testoi(ptr) == OPT) ? NFDHT : NFSH;
- NULLOI(ptr);
- break;
- case NFDHT:
- change_right(ptr->u.right,NFDHT);
- switch(traverse(ptr->u.right,tp,genthead,penthead,&dontcare)) {
- case FOUND:
- retval = FOUND;
- decroi(ptr); /* found on right, decrement operator o.i */
- if (testoi(ptr) != OI_IS_NULL) {
- restoreoi(ptr->left); /* rebuild left and right subtrees */
- restoreoi(ptr->u.right);
- }
- break;
- case NFDHT: /* NFDHT on both left and right, inherently optional */
- NULLOI(ptr);
- retval = NFDHT;
- break;
- case NFSH:
- /* not found right, see if optional */
- retval = (testoi(ptr) == OPT) ? NFDHT : NFSH;
- NULLOI(ptr);
- break;
- }
- break;
- }
- return(retval);
- }
-
- /*------------------------------------------------------*/
- /* R E S O L V E _ E N D T T A G _ M I N I */
- /* This routine attempts to perform endtag */
- /* minimization because a tag was encountered */
- /* that was found in the symbol table but did */
- /* not correspond to the expected tag. In order */
- /* to minimize the tag, all that is required is */
- /* that minimization was enabled on the DTD */
- /* element declaration. */
- /*------------------------------------------------------*/
- void resolve_endtag(ptr,token,tp,retval,tknretval,genthead,penthead,endtok)
- TNODE *ptr;
- int token;
- STENTRY *tp;
- STATUS *retval;
- TKNRETVAL tknretval;
- ENTITYDESC *genthead,*penthead;
- int endtok;
- {
- int token2;
- STENTRY *opened_tp;
- TNODE *newcm;
-
- /* If the token that was read is a valid inclusion exception, you should
- traverse the tree for that token as far as possible, then attempt to
- resolve the missing endtag that got you here. */
-
- if (tknretval==MARKUP_FOUND && find_except(currincl,token) && !find_except(currexcl,token)) {
- newcm = pushcreate(tp);
- if ((*retval = traverse(newcm,tp,genthead,penthead,&dontcare)) == NFSH) {
- if (tknretval == TEXT)
- sprintf(error_msg,"%s%s%s","\nError: Invalid data, last opened element '",tp->nametoken,"'.\n");
- else
- sprintf(error_msg,"%s%s%s","\nError: Invalid tag, last opened element '",tp->nametoken,"'.\n");
- FATAL_ERROR()
- }
-
- /* check to make sure the element has content */
- if (EMPTY_CONTENT(newcm)) { /* can't have endtag for EMPTY */
- token2 = token | HIGHBIT;
- (*print_ctr)(ctrfp,"\n[/%s]",tp->nametoken);
- (*applic)(END_TAG_NAME,tp->nametoken,"");
- }
- else
- tknretval = gettoken(&tp,&token2,genthead,penthead,&dontcare); /* must be end tag */
- flush_buf();
- if (IS_STARTTAG(token2) || IS_ENDTAG_NOTEQ(token2,token))
- resolve_endtag(tp->cmptr,token2,tp,retval,tknretval,genthead,penthead,token);
-
- tknretval = gettoken(&tp,&token2,genthead,penthead,&dontcare); /* must be end tag */
- if (IS_STARTTAG(token2) || IS_ENDTAG_NOTEQ(token2,endtok))
- resolve_endtag(tp->cmptr,token2,tp,retval,tknretval,genthead,penthead,endtok);
- if (ptr != NULL) /* don't decrement for root tag */
- decroi(ptr);
-
- popfree(newcm); /* through with this content model */
- }
- else {
- /* If endtag minimization was declared on the ELEMENT declaration */
- opened_tp = lookstack();
- if (opened_tp->miniexcept & ENDMINI_MASK) {
- (*print_ctr)(ctrfp,"\n[/%s]",opened_tp->nametoken);
- (*applic)(END_TAG_NAME,opened_tp->nametoken,"");
- if (ptr != NULL) /* don't decrement for root tag */
- decroi(ptr);
- *retval = FOUND;
- if (tknretval == MARKUP_FOUND)
- ungettoken(token,tp);
- }
- else {
- if (tknretval == TEXT)
- sprintf(error_msg,"%s%s%s","\nError: Invalid data, last opened element '",opened_tp->nametoken,"'.\n");
- else
- sprintf(error_msg,"%s%s%s","\nError: Invalid tag, last opened element '",opened_tp->nametoken,"'.\n");
- FATAL_ERROR()
- }
- }
- return;
- }
-
- /*------------------------------------------------------*/
- /* P E R F O R M _ R O O T T A G _ M I N I */
- /* This routine attempts to perform starttag */
- /* minimization because a tag was encountered */
- /* that was found in the symbol table but did */
- /* not correspond to the expected tag. In order */
- /* to minimize the tag, minimization must have */
- /* been enabled on the DTD element declaration */
- /* and the tag must be contextually required */
- /* relative to the tags already encountered. */
- /*------------------------------------------------------*/
- STENTRY *perform_roottag_mini(roottoken)
- int roottoken;
- {
- STENTRY *tp;
-
- if ((tp=linsrch(symtable,roottoken,numsym)) == NULL)
- software_fault();
-
- /* Can't have the root element included from an exception */
- if (tp->miniexcept & STARTMINI_MASK) {
- unprocess(tp->adptr);
- (*print_ctr)(ctrfp,"\n[%s",tp->nametoken);
- (*applic)(TAG_NAME,tp->nametoken,"");
-
- if (req_not_proc(tp->adptr) == TRUE) {
- sprintf(error_msg,"%s%s%s","\nError: REQUIRED or CURRENT attribute not specified '",tp->adptr->attrname,"'.\n");
- FATAL_ERROR()
- }
- if (resolve_attr(tp->adptr,TRUE) > GRPCNT)
- ourexit(2,"\nError: Total number of idrefs and idreflist > GRPCNT\n");
- (*put_ctr)(']',ctrfp);
- (*applic)(TAG_END,"","");
- }
- else {
- sprintf(error_msg,"%s%s%s","\nError: Invalid document element tag for '",tp->nametoken,"'.\n");
- FATAL_ERROR()
- }
- return(tp);
- }
-
- /*------------------------------------------------------*/
- /* R E S O L V E _ S T A R T T A G */
- /* This routine attempts to perform starttag */
- /* minimization because a tag was encountered */
- /* that was found in the symbol table but did */
- /* not correspond to the expected tag. In order */
- /* to minimize the tag, minimization must have */
- /* been enabled on the DTD element declaration */
- /* and the tag must be contextually required */
- /* relative to the tags already encountered. */
- /*------------------------------------------------------*/
- void resolve_starttag(ptr,retval,genthead,penthead,tknretval,token)
- TNODE *ptr;
- STATUS *retval;
- ENTITYDESC *genthead,*penthead;
- TKNRETVAL tknretval;
- int token;
- {
- STENTRY *tp;
- int token2; /* end tag token for exclusion handling */
- TNODE *newcm;
-
- /* If the token that was read is a valid inclusion exception, you should
- traverse the tree for that token as far as possible, then attempt to
- resolve the missing starttag that got you here. */
-
- if (tknretval==MARKUP_FOUND && find_except(currincl,token) && !find_except(currexcl,token)) {
- tknretval = gettoken(&tp,&token,genthead,penthead,&dontcare);
- newcm = pushcreate(tp);
-
- if ((*retval = traverse(newcm,tp,genthead,penthead,&dontcare)) == NFSH) {
- if (tknretval == TEXT)
- sprintf(error_msg,"%s%s%s","\nError: Invalid data, last opened element '",tp->nametoken,"'.\n");
- else
- sprintf(error_msg,"%s%s%s","\nError: Invalid tag, last opened element '",tp->nametoken,"'.\n");
- FATAL_ERROR()
- }
-
- /* check to make sure the element has content */
- if (EMPTY_CONTENT(newcm)) { /* can't have endtag for EMPTY */
- token2 = token | HIGHBIT;
- putstr_outbuf("\n[/");
- place_in_queue(END_TAG_NAME,tp->nametoken,"");
- putstr_outbuf(tp->nametoken);
- putstr_outbuf("]");
- }
- else
- tknretval = gettoken(&tp,&token2,genthead,penthead,&dontcare); /* must be end tag */
-
- flush_buf();
- if (IS_STARTTAG(token2) || IS_ENDTAG_NOTEQ(token2,token))
- resolve_endtag(tp->cmptr,token2,tp,retval,tknretval,genthead,penthead,token);
- popfree(newcm); /* through with this content model */
- }
- else
- /* The token must be contextually required and have starttag minimization
- declared as legal on the ELEMENT declaration. */
- if ((ptr->contreq!=C_NEVERO) && (symtable[ptr->nodeid].miniexcept & STARTMINI_MASK) &&
- ptr->contreq!=C_SOMETIMESO && !find_except(currexcl,ptr->nodeid)) {
- unprocess(symtable[ptr->nodeid].adptr);
- ptr->contref_attr = FALSE;
- (*print_ctr)(ctrfp,"\n[%s",symtable[ptr->nodeid].nametoken);
- (*applic)(TAG_NAME,symtable[ptr->nodeid].nametoken,"");
-
- if (req_not_proc(symtable[ptr->nodeid].adptr) == TRUE) {
- sprintf(error_msg,"%s%s%s","\nError: REQUIRED or CURRENT attribute not specified '",symtable[ptr->nodeid].adptr->attrname,"'.\n");
- FATAL_ERROR()
- }
- if (resolve_attr(symtable[ptr->nodeid].adptr,TRUE) > GRPCNT)
- ourexit(2,"\nError: Total number of idrefs and idreflist > GRPCNT\n");
- if (symtable[ptr->nodeid].adptr == NULL)
- (*put_ctr)(']',ctrfp);
- else
- (*print_ctr)(ctrfp,"\n]");
- (*applic)(TAG_END,"","");
- *retval = exe_new_conmod(&(symtable[ptr->nodeid]),genthead,penthead,ptr,TRUE);
- }
- else {
- if (*retval != FOUND) /* if FOUND once, stay FOUND forever */
- *retval = (testoi(ptr) == OPT) ? NFDHT : NFSH;
- NULLOI(ptr); /* null so will know not to restore */
- }
- return;
- }
-
- /*------------------------------------------------------*/
- /* S O L V E A N D */
- /* Determines if the AND linked list should */
- /* resolve to NFDHT, FOUND, NFSH. It */
- /* changes the retval to represent that. */
- /*------------------------------------------------------*/
- void solveand(retval,currp,ptr)
- STATUS *retval;
- TNODE *currp,*ptr;
- {
- switch(*retval) {
- case NFDHT:
- switch(testoi(currp)) {
- case PLUS:
- case ONE:
- *retval = NFSH;
- break;
- case OI_IS_NULL:
- *retval = FOUND;
- break;
- }
- break;
- case FOUND:
- switch(testoi(currp)) {
- case PLUS:
- case ONE:
- *retval = NFSH;
- break;
- }
- break;
- }
- if (*retval==FOUND && testoi(ptr)==OPT)
- incroi(ptr);
- return;
- }
-
- /*------------------------------------------------------*/
- /* T R A V E R S E */
- /* Here we determine whether this node */
- /* is an operator, operand, or terminal. */
- /* */
- /* returns -- NFDHT, FOUND, NFSH */
- /*------------------------------------------------------*/
- STATUS traverse(ptr,tp,genthead,penthead,firsttime)
- TNODE *ptr;
- STENTRY *tp;
- ENTITYDESC *genthead,*penthead;
- BOOLEAN *firsttime;
- {
- register STATUS retval;
-
- /* If the starttag had an explicit content reference attribute, its
- content must be empty, so assume the traversal was successful and
- return. */
-
- if (ptr->contref_attr == TRUE) {
- retval = FOUND;
- NULLOI(ptr);
- }
- else {
- switch(ptr->nodeid) {
- case COMMA:
- case AND:
- case OR:
- retval = dooperator(ptr,tp,genthead,penthead);
- break;
- case CDATA:
- case RCDATA:
- case PCDATA:
- case EMPTY:
- retval = doterminal(ptr,genthead,penthead);
- break;
- default:
- retval = dooperand(ptr,tp,genthead,penthead);
- break;
- }
- if (retval == FOUND)
- *firsttime = TRUE;
- }
- return(retval);
- }
-